import pandas as pd
from scipy.interpolate import lagrange

inputfile = '../data/missing_data.xlsx'
outputfile = '../tmp/missing_data_processed.xlsx'

data = pd.read_excel(inputfile, engine='openpyxl',header=None)

def ployinterp_column(s, n, k=5):
    y = s.reindex(list(range(n - k, n)) + list(range(n + 1, n + 1 + k)))  # 取数
    y = y[y.notnull()]
    return lagrange(y.index, list(y))(n)

for i in data.columns:
    for j in range(len(data)):
        if pd.isnull(data[i][j]):
            data[i][j] = ployinterp_column(data[i], j)

data.to_excel(outputfile, engine='openpyxl',header=None, index=False)